# -------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
# -------------------------------------------------------------

# Autogenerated By   : src/main/python/generator/generator.py
# Autogenerated From : scripts/builtin/topk_cleaning.dml

from typing import Dict, Iterable

from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
from systemds.utils.consts import VALID_INPUT_TYPES


def topk_cleaning(dataTrain: Frame,
                  primitives: Frame,
                  parameters: Frame,
                  evaluationFunc: str,
                  evalFunHp: Matrix,
                  **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
     This function cleans top-K item (where K is given as input) for a given list of users.
     metaData[3, ncol(X)] : metaData[1] stores mask, metaData[2] stores schema, metaData[3] stores FD mask
    
    
    
    :param dataTrain: Training set
    :param dataTest: Test set ignored when cv is set to True
    :param metaData: 3×n frame with schema, categorical mask, and FD mask for dataTrain
    :param primitives: Library of primitive cleaning operators
    :param parameters: Hyperparameter search space that matches the primitives
    :param refSol: Reference solution
    :param evaluationFunc: Name of a SystemDS DML function that scores a pipeline
    :param evalFunHp: Hyperparameter matrix for the above evaluation function
    :param topK: Number of best pipelines to return
    :param resource_val: Maximum resource R for the Bandit search
    :param max_iter: Maximum iterations while enumerating logical pipelines
    :param lq: Lower quantile used by utils::doErrorSample when triggered
    :param uq: Upper quantile used by utils::doErrorSample when triggered
    :param sample: Fraction of rows to subsample from dataTrain
    :param expectedIncrease: Minimum improvement over dirtyScore that a candidate must deliver
    :param seed: Seed number
    :param cv: TRUE means k-fold CV, FALSE means hold-out split
    :param cvk: Number of folds if cv = TRUE
    :param isLastLabel: TRUE if the last column is the label
    :param rowCount: Row-count threshold above which doErrorSample may replace uniform sampling
    :param correctTypos: Run spelling correction in the string preprocessing step
    :param enablePruning: Enable pruning inside the Bandit phase
    :return: K cleaned-data pipelines
    :return: Hyperparameter matrix with rows aligning with topKPipelines
    :return: Evaluation scores with rows aligning with topKPipelines
    :return: Baseline score on the unclean data
    :return: Updated evaluation function hyperparameters
    :return: Frame of “apply” functions for deploying each of the top-K pipelines
    """

    params_dict = {'dataTrain': dataTrain, 'primitives': primitives, 'parameters': parameters, 'evaluationFunc': evaluationFunc, 'evalFunHp': evalFunHp}
    params_dict.update(kwargs)
    
    vX_0 = Frame(dataTrain.sds_context, '')
    vX_1 = Matrix(dataTrain.sds_context, '')
    vX_2 = Matrix(dataTrain.sds_context, '')
    vX_3 = Scalar(dataTrain.sds_context, '')
    vX_4 = Matrix(dataTrain.sds_context, '')
    vX_5 = Frame(dataTrain.sds_context, '')
    output_nodes = [vX_0, vX_1, vX_2, vX_3, vX_4, vX_5, ]

    op = MultiReturn(dataTrain.sds_context, 'topk_cleaning', output_nodes, named_input_nodes=params_dict)

    vX_0._unnamed_input_nodes = [op]
    vX_1._unnamed_input_nodes = [op]
    vX_2._unnamed_input_nodes = [op]
    vX_3._unnamed_input_nodes = [op]
    vX_4._unnamed_input_nodes = [op]
    vX_5._unnamed_input_nodes = [op]

    return op
